#!/bin/bash

CUDA_VISIBLE_DEVICES=0 python -u -m fastchat.serve.vllm_worker \
     --model-path /GLOBALFS/nudt_dwfeng_1/llm/Qwen/QwQ-32B-Preview \
     --port 21011 \
     --model-names QwQ \
     --max-model-len 8192 \
     --worker-address http://0.0.0.0:21011